********************************************************************************
* 07_figures.do
* Generate All Figures for JHR Paper
* "Teacher Testing Standards and the New Teacher Pipeline"
* Law, Marks, and Stern
*
* Figures Generated:
*   - Figure 1: Test Difficulty Index Over Time
*   - Figure 2A: Delta TDI vs % Change in Enrollments
*   - Figure 2B: Delta TDI vs % Change in Graduations
*   - Figure 3: Education Major Enrollments Event Study
*   - Figure 4: Teacher Preparation Graduations Event Study
*   - Figure 5A-C: Placebo Event Studies
*   - Figure 6: Log New Teacher Licenses Event Study
*   - Figure 7: Teacher Shortage Areas Event Study
*   - Figure A2: Title II Graduations Event Study
*   - Figure A3: Delta TDI vs % Change in Licenses
*
* Inputs: CSV outputs from 05 and 06 scripts, plus cleaned data
* Outputs: PDF + PNG figures in output/figures/
*
* Required: coefplot (ssc install coefplot)
********************************************************************************

clear all
set more off

display "========================================================"
display "07: Generating Figures"
display "========================================================"

capture mkdir "output"
capture mkdir "output/figures"

* Ensure coefplot is installed
capture which coefplot
if _rc {
    display "  Installing coefplot..."
    ssc install coefplot, replace
}

* Common graph settings: black and white, clean
set scheme s2mono
graph set window fontface "Arial"

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 1: Test Difficulty Index Over Time
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure 1: TDI Over Time..."

capture confirm file "data/cleaned/ets_treatment_data.xlsx"
if _rc == 0 {
    import excel "data/cleaned/ets_treatment_data.xlsx", firstrow clear

    * Keep sample states, drop ND
    drop if State == "ND"
    keep if year <= 2018

    * Keep only needed vars
    keep State year test_index

    * Encode state for xtline
    encode State, gen(state_id)
    xtset state_id year

    * Line plot: each state as separate line
    xtline test_index, overlay ///
        legend(off) ///
        ytitle("Test Difficulty Index (TDI)") xtitle("Academic Year") ///
        xline(2013.5, lpattern(dash) lcolor(black)) ///
        xlabel(2008(2)2018) ///
        graphregion(color(white)) plotregion(color(white)) ///
        note("Dashed line: Praxis Core replaces PPST")

    graph export "output/figures/figure_1_tdi_over_time.pdf", replace
    graph export "output/figures/figure_1_tdi_over_time.png", replace width(2400)
    display "  Saved: figure_1_tdi_over_time"
}
else {
    display "  Skipping Figure 1: ets_treatment_data.xlsx not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURES 2A & 2B: Scatter Plots (Delta TDI vs Outcomes)
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figures 2A & 2B: Scatter Plots..."

* --- Figure 2A: Delta TDI vs % Change in Enrollments ---
capture confirm file "data/cleaned/enrollment_event_data.xlsx"
if _rc == 0 {
    import excel "data/cleaned/enrollment_event_data.xlsx", firstrow clear

    * State-level collapse: sum enrollments by state-year
    collapse (sum) eftotlt (first) continuous_treat, by(State year)

    * Calculate % change from 2012 to 2016
    reshape wide eftotlt, i(State continuous_treat) j(year)
    gen pct_change = (eftotlt2016 - eftotlt2012) / eftotlt2012 * 100
    keep State continuous_treat pct_change
    drop if pct_change == .

    * Scatter with fitted line and state labels
    twoway (scatter pct_change continuous_treat, mlabel(State) mlabposition(3) ///
            mcolor(black) msymbol(O) mlabcolor(black)) ///
           (lfit pct_change continuous_treat, lcolor(black) lwidth(medium)), ///
        ytitle("% Change in Enrollments (2012-2016)") ///
        xtitle("Change in Test Difficulty Index (2012-2014)") ///
        legend(off) ///
        graphregion(color(white)) plotregion(color(white))

    graph export "output/figures/figure_2a_scatter_enrollments.pdf", replace
    graph export "output/figures/figure_2a_scatter_enrollments.png", replace width(2400)
    display "  Saved: figure_2a_scatter_enrollments"
}

* --- Figure 2B: Delta TDI vs % Change in Graduations ---
capture confirm file "data/cleaned/graduation_event_data.xlsx"
if _rc == 0 {
    import excel "data/cleaned/graduation_event_data.xlsx", firstrow clear

    collapse (sum) ctotalt (first) continuous_treat, by(State year)
    reshape wide ctotalt, i(State continuous_treat) j(year)
    gen pct_change = (ctotalt2016 - ctotalt2012) / ctotalt2012 * 100
    keep State continuous_treat pct_change
    drop if pct_change == .

    twoway (scatter pct_change continuous_treat, mlabel(State) mlabposition(3) ///
            mcolor(black) msymbol(O) mlabcolor(black)) ///
           (lfit pct_change continuous_treat, lcolor(black) lwidth(medium)), ///
        ytitle("% Change in Graduations (2012-2016)") ///
        xtitle("Change in Test Difficulty Index (2012-2014)") ///
        legend(off) ///
        graphregion(color(white)) plotregion(color(white))

    graph export "output/figures/figure_2b_scatter_graduations.pdf", replace
    graph export "output/figures/figure_2b_scatter_graduations.png", replace width(2400)
    display "  Saved: figure_2b_scatter_graduations"
}

* ══════════════════════════════════════════════════════════════════════════════
* Helper program: Read event study CSV and create coefficient plot
* ══════════════════════════════════════════════════════════════════════════════

capture program drop plot_event_study
program define plot_event_study
    args csv_file out_name ytitle treatment_year shade_start ymin ymax

    * Import CSV
    import delimited "`csv_file'", clear

    * Rename if needed — handle both Stata 14-15 (dots removed) and 16+ (dots→underscores)
    capture rename stderror se
    capture rename std_error se
    capture rename stderr se
    capture rename conflow conf_low_v
    capture rename conf_low conf_low_v
    capture rename confhigh conf_high_v
    capture rename conf_high conf_high_v

    * Generate CI if not present
    capture confirm variable conf_low_v
    if _rc {
        capture confirm variable se
        if _rc {
            * No SE variable found — cannot compute CI
            display "  Warning: could not identify SE variable"
            gen se = 0
        }
        gen conf_low_v = estimate - 1.96 * se
        gen conf_high_v = estimate + 1.96 * se
    }

    * Sort by year
    sort year

    * Create the plot manually with twoway
    local plot_cmd ""

    * Add shading for post-treatment graduation lag if specified
    if "`shade_start'" != "" & "`shade_start'" != "." {
        summarize year, meanonly
        local maxyr = r(max)
        local plot_cmd `"`plot_cmd' (scatteri `ymin' `shade_start' `ymax' `shade_start' `ymax' `maxyr'.5 `ymin' `maxyr'.5, recast(area) color(gs14) fcolor(gs14))"'
    }

    * Confidence intervals as rcap
    local plot_cmd `"`plot_cmd' (rcap conf_low_v conf_high_v year, lcolor(black) lwidth(thin))"'

    * Point estimates as scatter
    local plot_cmd `"`plot_cmd' (scatter estimate year, mcolor(black) msymbol(O) msize(medium))"'

    * Execute the plot
    twoway `plot_cmd', ///
        yline(0, lcolor(black) lwidth(vthin)) ///
        xline(`treatment_year', lpattern(dash) lcolor(black) lwidth(thin)) ///
        ytitle("`ytitle'") xtitle("Year") ///
        ylabel(, angle(horizontal)) ///
        yscale(range(`ymin' `ymax')) ///
        legend(off) ///
        graphregion(color(white)) plotregion(color(white))

    graph export "output/figures/`out_name'.pdf", replace
    graph export "output/figures/`out_name'.png", replace width(2400)
    display "  Saved: `out_name'"
end

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 3: Education Major Enrollments Event Study
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure 3: Enrollments Event Study..."

capture confirm file "output/tables/composite_enrollments_event_study_total.csv"
if _rc == 0 {
    plot_event_study ///
        "output/tables/composite_enrollments_event_study_total.csv" ///
        "figure_3_enrollments_event_study" ///
        "Log Enrollment Estimate" ///
        2013 "." -0.65 0.30
}
else {
    display "  Skipping: CSV not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 4: Teacher Preparation Graduations Event Study
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure 4: Graduations Event Study..."

capture confirm file "output/tables/composite_graduations_event_study_total.csv"
if _rc == 0 {
    plot_event_study ///
        "output/tables/composite_graduations_event_study_total.csv" ///
        "figure_4_graduations_event_study" ///
        "Log Graduation Estimate" ///
        2013 2015 -0.65 0.30
}
else {
    display "  Skipping: CSV not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 5A: Non-Education Enrollments Placebo
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure 5A: Non-Ed Enrollments Placebo..."

capture confirm file "output/tables/enrollments_placebo_event_study_total.csv"
if _rc == 0 {
    plot_event_study ///
        "output/tables/enrollments_placebo_event_study_total.csv" ///
        "figure_5a_placebo_enrollments" ///
        "Log Non-Education Enrollment Estimate" ///
        2013 "." -0.40 0.30
}
else {
    display "  Skipping: CSV not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 5B: Non-Education Completions Placebo
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure 5B: Non-Ed Completions Placebo..."

capture confirm file "output/tables/placebo_non_ed_completions.csv"
if _rc == 0 {
    plot_event_study ///
        "output/tables/placebo_non_ed_completions.csv" ///
        "figure_5b_placebo_graduations" ///
        "Log Non-Education Graduation Estimate" ///
        2013 2015 -0.20 0.30
}
else {
    display "  Skipping: CSV not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 5C: Other Education Completions Placebo
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure 5C: Other Ed Completions Placebo..."

capture confirm file "output/tables/placebo_other_ed_completions.csv"
if _rc == 0 {
    plot_event_study ///
        "output/tables/placebo_other_ed_completions.csv" ///
        "figure_5c_other_education_graduations" ///
        "Log Other Education Graduation Estimate" ///
        2013 2015 -0.35 0.55
}
else {
    display "  Skipping: CSV not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 6: Log New Teacher Licenses Event Study
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure 6: Licenses Event Study..."

capture confirm file "output/tables/state_licenses_event_study_coefficients.csv"
if _rc == 0 {
    plot_event_study ///
        "output/tables/state_licenses_event_study_coefficients.csv" ///
        "figure_6_licenses_event_study" ///
        "Log License Estimate" ///
        2013 2015 -1.0 0.50
}
else {
    display "  Skipping: CSV not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 7: Teacher Shortage Areas Event Study
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure 7: Shortages Event Study..."

capture confirm file "output/tables/teacher_shortage_event_study.csv"
if _rc == 0 {
    plot_event_study ///
        "output/tables/teacher_shortage_event_study.csv" ///
        "figure_7_shortages_event_study" ///
        "Log Teacher Shortage Estimate" ///
        2013 2015 -1.0 2.1
}
else {
    display "  Skipping: CSV not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE A2: Title II Graduations Event Study
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure A2: Title II Graduations Event Study..."

* Read from Stata's own outreg2 output or from the JHR results folder
local titleII_file "data/raw/title_ii_results/title_II_graduations_event_study.xlsx"
capture confirm file "`titleII_file'"
if _rc == 0 {
    * Parse the Stata outreg2 xlsx to extract coefficients
    * Read the xlsx as raw data
    import excel "`titleII_file'", clear

    * Look for year_ variables and extract coefficient and SE
    tempname fh
    file open `fh' using "output/tables/titleII_grad_event_study_parsed.csv", write replace
    file write `fh' "year,estimate,std_error,conf_low,conf_high" _n
    file write `fh' "2012,0,0,0,0" _n

    local N = _N
    forvalues i = 1/`N' {
        local val = A[`i']
        if regexm("`val'", "^year_([0-9]+)$") {
            local yr = regexs(1)
            if `yr' != 2012 {
                local j = `i' + 1
                local coef_str = B[`i']
                local se_str = B[`j']
                * Remove stars and parens
                local coef_str = subinstr("`coef_str'", "*", "", .)
                local se_str = subinstr("`se_str'", "(", "", .)
                local se_str = subinstr("`se_str'", ")", "", .)
                local b = real("`coef_str'")
                local se = real("`se_str'")
                if `b' != . & `se' != . {
                    local ci_lo = `b' - 1.96 * `se'
                    local ci_hi = `b' + 1.96 * `se'
                    file write `fh' "`yr',`b',`se',`ci_lo',`ci_hi'" _n
                }
            }
        }
    }
    file close `fh'

    plot_event_study ///
        "output/tables/titleII_grad_event_study_parsed.csv" ///
        "figure_a2_titleII_graduations" ///
        "Log Graduation Estimate" ///
        2013 2015 -1.0 0.5

}
else {
    display "  Skipping Figure A2: Title II event study xlsx not found"
}

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE A3: Delta TDI vs % Change in Licenses (Scatter)
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Figure A3: Scatter - Delta TDI vs Licenses..."

local license_file "data/raw/licenses/state_data_clean.xlsx"
capture confirm file "`license_file'"
if _rc == 0 {
    import excel "`license_file'", firstrow clear
    keep State year licenses continuous_treat
    drop if licenses == .

    * Calculate % change from 2012 to 2016
    keep if year == 2012 | year == 2016
    reshape wide licenses, i(State continuous_treat) j(year)
    gen pct_change = (licenses2016 - licenses2012) / licenses2012 * 100
    keep State continuous_treat pct_change
    drop if pct_change == .

    twoway (scatter pct_change continuous_treat, mlabel(State) mlabposition(3) ///
            mcolor(black) msymbol(O) mlabcolor(black)) ///
           (lfit pct_change continuous_treat, lcolor(black) lwidth(medium)), ///
        ytitle("% Change in New Teacher Licenses (2012-2016)") ///
        xtitle("Change in Test Difficulty Index (2012-2014)") ///
        legend(off) ///
        graphregion(color(white)) plotregion(color(white))

    graph export "output/figures/figure_a3_scatter_licenses.pdf", replace
    graph export "output/figures/figure_a3_scatter_licenses.png", replace width(2400)
    display "  Saved: figure_a3_scatter_licenses"
}
else {
    display "  Skipping Figure A3: License data not found"
}

* ──────────────────────────────────────────────────────────────────────────────
* Summary
* ──────────────────────────────────────────────────────────────────────────────

display ""
display "========================================================"
display "07: Figures Summary"
display "========================================================"
display "Created (where data available):"
display "  - Figure 1: TDI Over Time"
display "  - Figure 2A: Scatter - Delta TDI vs Enrollments"
display "  - Figure 2B: Scatter - Delta TDI vs Graduations"
display "  - Figure 3: Enrollments Event Study"
display "  - Figure 4: Graduations Event Study"
display "  - Figure 5A: Non-Ed Enrollments Placebo"
display "  - Figure 5B: Non-Ed Completions Placebo"
display "  - Figure 5C: Other Ed Completions Placebo"
display "  - Figure 6: Licenses Event Study"
display "  - Figure 7: Shortages Event Study"
display "  - Figure A2: Title II Graduations Event Study"
display "  - Figure A3: Scatter - Delta TDI vs Licenses"
display ""
display "07_figures.do complete."
